packages = c('sf', 'tidyverse','readxl','maptools', 'raster','spatstat', 'tmap')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
data_name_list <- list(
"Standar Kelurahan Data Corona (31 Maret 2020 Pukul 08.00)",
"Standar Kelurahan Data Corona (30 April 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 MEI 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (30 Juni 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 Juli 2020 Pukul 09.00)",
"Standar Kelurahan Data Corona (31 Agustus 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 September 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Oktober 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 November 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Desember 2020 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 Januari 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (28 Februari 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Maret 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 April 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Mei 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (30 Juni 2021 Pukul 10.00)",
"Standar Kelurahan Data Corona (31 Juli 2021 Pukul 10.00)")
data_refer_list <- list("03_2020", "04_2020", "05_2020", "06_2020", "07_2020", "08_2020", "09_2020", "10_2020", "11_2020", "12_2020", "01_2021", "02_2021", "03_2021", "04_2021", "05_2021", "06_2021", "07_2021")
for (x in 1:length(data_name_list)) {
path = paste("data/",data_name_list[x],".xlsx", sep = "")
df= read_excel(path, sheet = "data")
# from March 2020 to June 2020 raw data excels have double "ID_KEL"
if (x < 5) {
df$ID_KEL...2 <- NULL
names(df)[names(df) == 'ID_KEL...1'] <- 'ID_KEL'
}
# from July 2020 onward raw data excels have double "Meninggal"
else if (x == 5) {
df$Meninggal...21 <- NULL
names(df)[names(df) == 'Meninggal...26'] <- 'Meninggal'
}
else if (x == 6) {
df$Meninggal...23 <- NULL
names(df)[names(df) == 'Meninggal...28'] <- 'Meninggal'
}
else if (x == 7) {
df$Meninggal...24 <- NULL
names(df)[names(df) == 'Meninggal...29'] <- 'Meninggal'
}
else if (x == 8 | x == 9 | x == 10) {
df$Meninggal...25 <- NULL
names(df)[names(df) == 'Meninggal...30'] <- 'Meninggal'
}
else if (x > 10) {
df$Meninggal...26 <- NULL
names(df)[names(df) == 'Meninggal...31'] <- 'Meninggal'
}
df <- df[,c("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal")]
df$'month' <- data_refer_list[x]
df <- df[-c(1), ]
df <- df[(df$Nama_provinsi=="DKI JAKARTA"),]
df_name = paste("df_", data_refer_list[x], sep = "")
assign(df_name, df)
}
temp_bind_df <- rbind(df_03_2020,df_04_2020)
temp_bind_df <- rbind(temp_bind_df,df_05_2020)
temp_bind_df <- rbind(temp_bind_df,df_06_2020)
temp_bind_df <- rbind(temp_bind_df,df_07_2020)
temp_bind_df <- rbind(temp_bind_df,df_08_2020)
temp_bind_df <- rbind(temp_bind_df,df_09_2020)
temp_bind_df <- rbind(temp_bind_df,df_10_2020)
temp_bind_df <- rbind(temp_bind_df,df_11_2020)
temp_bind_df <- rbind(temp_bind_df,df_12_2020)
temp_bind_df <- rbind(temp_bind_df,df_01_2021)
temp_bind_df <- rbind(temp_bind_df,df_02_2021)
temp_bind_df <- rbind(temp_bind_df,df_03_2021)
temp_bind_df <- rbind(temp_bind_df,df_04_2021)
temp_bind_df <- rbind(temp_bind_df,df_05_2021)
temp_bind_df <- rbind(temp_bind_df,df_06_2021)
binded_df <- rbind(temp_bind_df,df_07_2021)
DJ = st_read(dsn = "data",
layer = "BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA")
Reading layer `BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA' from data source `C:\yiling-yu\IS415_Blog\_take_home_exercises\Take-home Exercise 1\data'
using driver `ESRI Shapefile'
Simple feature collection with 269 features and 161 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 106.3831 ymin: -6.370815 xmax: 106.9728 ymax: -5.184322
Geodetic CRS: WGS 84
#EPSG for DGN95 / Indonesia TM-3 zone 54.1: 23845
DJ_sf <- st_transform(DJ, crs = 23845)
st_geometry(DJ_sf)
Geometry set for 269 features
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -3691981 ymin: 663887.8 xmax: -3606237 ymax: 815440.9
Projected CRS: DGN95 / Indonesia TM-3 zone 54.1
First 5 geometries:
tmap_mode('view')
tm_shape(DJ_sf) +
tm_polygons()
DJ_sf <- DJ_sf[!(DJ_sf$KAB_KOTA == "KEPULAUAN SERIBU"),]
DJ_sf <- DJ_sf %>%
drop_na()
tmap_mode('plot')
tm_shape(DJ_sf) +
tm_polygons()
DJ_sf = DJ_sf[,c(1:9)]
#DJ_sf$geometry <- NULL
nama_kelurahan_values <- unique(binded_df[c("nama_kelurahan")])
DESA_KELUR_values <- unique(DJ_sf[c("DESA_KELUR")]) %>%
st_set_geometry(NULL)
no_match <- list()
for (x in nama_kelurahan_values$nama_kelurahan) {
if (x %in% DESA_KELUR_values$DESA_KELUR == FALSE){
no_match <- append(no_match,x)
}
}
no_match
[[1]]
[1] "PINANG RANTI"
[[2]]
[1] "BALE KAMBANG"
[[3]]
[1] "PAL MERIAM"
[[4]]
[1] "JATI PULO"
[[5]]
[1] "KALI BARU"
[[6]]
[1] "RAWA JATI"
[[7]]
[1] "KERENDANG"
[[8]]
[1] "KAMPUNG TENGAH"
[[9]]
[1] "KRAMAT JATI"
[[10]]
[1] "HALIM PERDANA KUSUMAH"
[[11]]
[1] "P. HARAPAN"
[[12]]
[1] "P. KELAPA"
[[13]]
[1] "P. PANGGANG"
[[14]]
[1] "P. PARI"
[[15]]
[1] "P. TIDUNG"
[[16]]
[1] "UNTUNG JAWA"
[[17]]
[1] "PULAU HARAPAN"
[[18]]
[1] "PULAU KELAPA"
[[19]]
[1] "PULAU PANGGANG"
[[20]]
[1] "PULAU PARI"
[[21]]
[1] "PULAU TIDUNG"
[[22]]
[1] "PULAU UNTUNG JAWA"
*found many sub-district names not matched
for (x in no_match) {
if (x == "KERENDANG" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "KRENDANG"
}
else if (x == "KAMPUNG TENGAH" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "TENGAH"
}
else if (x == "HALIM PERDANA KUSUMAH" ){
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- "HALIM PERDANA KUSUMA"
}
else if (x == "P. HARAPAN" | x == "P. KELAPA" | x == "P. PANGGANG" | x == "P. PARI" | x == "P. TIDUNG" | x == "UNTUNG JAWA" | x == "PULAU HARAPAN" | x == "PULAU KELAPA" | x == "PULAU PANGGANG" | x == "PULAU PARI"| x == "PULAU TIDUNG" | x == "PULAU UNTUNG JAWA"){
binded_df<-binded_df[!(binded_df$nama_kelurahan == x),]
}
else {
binded_df$nama_kelurahan[binded_df$nama_kelurahan == x] <- str_replace_all(string=x, pattern=" ", repl="")
}
}
nama_kelurahan_values <- unique(binded_df[c("nama_kelurahan")])
DESA_KELUR_values <- unique(DJ_sf[c("DESA_KELUR")]) %>%
st_set_geometry(NULL)
no_match <- list()
for (x in nama_kelurahan_values$nama_kelurahan) {
if (x %in% DESA_KELUR_values$DESA_KELUR == FALSE){
no_match <- append(no_match,x)
}
}
no_match
list()
*all sub-district names matched
DJ_covid <- left_join(DJ_sf, binded_df,
by = c("DESA_KELUR" = "nama_kelurahan"))
colSums(is.na(DJ_covid))
OBJECT_ID KODE_DESA DESA KODE
0 0 0 0
PROVINSI KAB_KOTA KECAMATAN DESA_KELUR
0 0 0 0
JUMLAH_PEN ID_KEL Nama_provinsi nama_kota
0 0 0 0
nama_kecamatan POSITIF Meninggal month
0 0 0 0
geometry
0
#column names
#POSITIF (cumulative confirmed cases)
#Meninggal (cumulative death cases)
#JUMLAH_PEN (Total Population)
DJ_covid <- DJ_covid %>%
mutate(`POSITIF%` = (`POSITIF`
/`JUMLAH_PEN`)*10000) %>%
mutate(`Meninggal%` = (`Meninggal`
/`JUMLAH_PEN`)*10000)
cleaned_df <- write_rds(DJ_covid, "data/DJ_covid.rds")
DJ_covid <- read_rds("data/DJ_covid.rds")
head(DJ_covid, n=5)
#simple plot
test <- DJ_covid %>%
filter(month=="03_2020")
#tmap_mode("view")
#qtm(test,
# fill = "POSITIF%")
tmap_mode("view")+
tm_shape(test)+
tm_fill("POSITIF%", breaks = c(0, 4, 13, 53, 119, 270, 395, 3808),
legend.is.portrait = TRUE) +
tm_borders(alpha = 0.5)
#EDA? any histogram? ggplot?
#image for eval=false + title label
#hands-on and in-class exercise 3